# data_preparation.py

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

def load_and_preprocess_data():
    # 加载MNIST数据集
    mnist = fetch_openml('mnist_784', version=1)
    X = mnist['data'].astype(float)
    y = mnist['target'].astype(int)

    # 归一化到[0,1]
    X /= 255.0

    # 划分训练集、验证集和测试集
    X_train, X_val_test, y_train, y_val_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    X_val, X_test, y_val, y_test = train_test_split(
        X_val_test, y_val_test, test_size=0.5, random_state=42
    )

    return X_train, X_val, X_test, y_train, y_val, y_test
